#### collect legislation
collect_legislation <- function(type){
  legismat <- eurlex::elx_run_query(eurlex::elx_make_query(type, include_force = FALSE,
                                                           include_date = TRUE, 
                                                           include_directory = TRUE,
                                                           include_date_force = TRUE,
                                                           include_date_endvalid = TRUE, 
                                                           include_eurovoc = TRUE,
                                                           include_sector = TRUE))
tmp_mat <- legismat |> 
  dplyr::select(work, celex, date, dateforce, dateendvalid) |> 
  unique()

  tmp_mat <- tmp_mat |> 
    na.omit() |> 
    dplyr::arrange(date) 
  tmp_mat$year <- lubridate::year(tmp_mat$date)
  
  
  
  uri <- paste0("http://publications.europa.eu/resource/celex/", tmp_mat$celex)
  
  temp <- lapply(seq(uri), function(i) {
    title <- tryCatch(eurlex::elx_fetch_data(uri[i],type =c("title")),
                      error = function(e) {
                        message(paste0("Error: ", e))
                        return(NULL)
                      })
    text <-  tryCatch(eurlex::elx_fetch_data(uri[i],type =c("text")),
                      error = function(e) {
                        message(paste0("Error: ", e))
                        return(NULL)
                      })
    if (!is.null(text)){
    text <-   stringr::str_replace_all(text, "•", " ") |> 
      stringr::str_remove_all("---pagebreak---") |> 
      stringr::str_remove_all("Official Journal of the European Communities") 
    
    text <-  sapply(text, function(x) {
      x <- stringr::str_replace_all(x, "\n", " ")
      x <- stringr::str_squish(x) 
    })

    out <-  list(title = title, date = tmp_mat$date[i], year = tmp_mat$year[i],
                 type = type, celex = tmp_mat$celex[i], 
                 work = tmp_mat$work[i], url = uri[i],
                 date_force = tmp_mat$dateforce[i], date_endvalid = tmp_mat$dateendvalid[i])
    add_info <- legismat[legismat$celex == tmp_mat$celex[i] & legismat$work == tmp_mat$work[i] &
                         legismat$date == tmp_mat$date[i] & legismat$dateforce == tmp_mat$dateforce[i] &
                           legismat$dateendvalid == tmp_mat$dateendvalid[i],] 
    
    out$work <- unlist(unique(add_info$work))
    out$sector <- unlist(unique(add_info$sector))
    out$eurovoc <- unlist(unique(add_info$eurovoc))
    out$directory <- unlist(unique(add_info$directory))
    out$sector <- unlist(unique(add_info$sector))
    out$text <- unique(text)
    

    
    writeLines(as.character(out), here::here("data","legislation",type,paste0(i, ".txt")))
    }
    return(NULL)
  })
  return(here::here("legislation",type))
}

